import nltk
import matplotlib

tokens=[ 'my','dog','has','flea','problems','help','please',
         'maybe','not','take','him','to','dog','park','stupid',
         'my','dalmation','is','so','cute','I','love','him'  ]
#统计词频
freq = nltk.FreqDist(tokens)
 
#输出词和相应的频率
for key,val in freq.items():
    print (str(key) + ':' + str(val))
 
#可以把最常用的5个单词拿出来
standard_freq=freq.most_common(5)
print(standard_freq)
 
#绘图函数为这些词频绘制一个图形
freq.plot(20, cumulative=False)  # cumulative=False 表示横坐标为词，纵坐标为词频，20表示显示20个词